@inproceedings{Zhao_2022, series={ISCA ’22},
   title={Understanding data storage and ingestion for large-scale deep recommendation model training: industrial product},
   url={http://dx.doi.org/10.1145/3470496.3533044},
   DOI={10.1145/3470496.3533044},
   booktitle={Proceedings of the 49th Annual International Symposium on Computer Architecture},
   publisher={ACM},
   author={Zhao, Mark and Agarwal, Niket and Basant, Aarti and Gedik, Buğra and Pan, Satadru and Ozdal, Mustafa and Komuravelli, Rakesh and Pan, Jerry and Bao, Tianshu and Lu, Haowei and Narayanan, Sundaram and Langman, Jack and Wilfong, Kevin and Rastogi, Harsha and Wu, Carole-Jean and Kozyrakis, Christos and Pol, Parik},
   year={2022},
   month=jun, pages={1042–1057},
   collection={ISCA ’22} }


@misc{zhao2024cedaroptimizedunifiedmachine,
  title={cedar: Optimized and Unified Machine Learning Input Data Pipelines}, 
  author={Mark Zhao and Emanuel Adamiak and Christos Kozyrakis},
  year={2024},
  eprint={2401.08895},
  archivePrefix={arXiv},
  primaryClass={cs.LG},
  url={https://arxiv.org/abs/2401.08895}, 
}

@misc{liang2024resourceallocationworkloadscheduling,
      title={Resource Allocation and Workload Scheduling for Large-Scale Distributed Deep Learning: A Survey}, 
      author={Feng Liang and Zhen Zhang and Haifeng Lu and Chengming Li and Victor C. M. Leung and Yanyi Guo and Xiping Hu},
      year={2024},
      eprint={2406.08115},
      archivePrefix={arXiv},
      primaryClass={cs.DC},
      url={https://arxiv.org/abs/2406.08115}, 
}

@misc{vellaisamy2025characterizingoptimizingllminference,
      title={Characterizing and Optimizing LLM Inference Workloads on CPU-GPU Coupled Architectures}, 
      author={Prabhu Vellaisamy and Thomas Labonte and Sourav Chakraborty and Matt Turner and Samantika Sury and John Paul Shen},
      year={2025},
      eprint={2504.11750},
      archivePrefix={arXiv},
      primaryClass={cs.DC},
      url={https://arxiv.org/abs/2504.11750}, 
}

@misc{asgar2025efficientscalableagenticai,
      title={Efficient and Scalable Agentic AI with Heterogeneous Systems}, 
      author={Zain Asgar and Michelle Nguyen and Sachin Katti},
      year={2025},
      eprint={2507.19635},
      archivePrefix={arXiv},
      primaryClass={cs.LG},
      url={https://arxiv.org/abs/2507.19635}, 
}

@inproceedings {276938,
author = {Qizhen Weng and Wencong Xiao and Yinghao Yu and Wei Wang and Cheng Wang and Jian He and Yong Li and Liping Zhang and Wei Lin and Yu Ding},
title = {{MLaaS} in the Wild: Workload Analysis and Scheduling in {Large-Scale} Heterogeneous {GPU} Clusters},
booktitle = {19th USENIX Symposium on Networked Systems Design and Implementation (NSDI 22)},
year = {2022},
isbn = {978-1-939133-27-4},
address = {Renton, WA},
pages = {945--960},
url = {https://www.usenix.org/conference/nsdi22/presentation/weng},
publisher = {USENIX Association},
month = apr
}

@inproceedings{10.1145/3458817.3476223,
author = {Hu, Qinghao and Sun, Peng and Yan, Shengen and Wen, Yonggang and Zhang, Tianwei},
title = {Characterization and prediction of deep learning workloads in large-scale GPU datacenters},
year = {2021},
isbn = {9781450384421},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3458817.3476223},
doi = {10.1145/3458817.3476223},
abstract = {Modern GPU datacenters are critical for delivering Deep Learning (DL) models and services in both the research community and industry. When operating a datacenter, optimization of resource scheduling and management can bring significant financial benefits. Achieving this goal requires a deep understanding of the job features and user behaviors. We present a comprehensive study about the characteristics of DL jobs and resource management. First, we perform a large-scale analysis of real-world job traces from SenseTime. We uncover some interesting conclusions from the perspectives of clusters, jobs and users, which can facilitate the cluster system designs. Second, we introduce a general-purpose framework, which manages resources based on historical data. As case studies, we design (1) a Quasi-Shortest-Service-First scheduling service, which can minimize the cluster-wide average job completion time by up to 6.5\texttimes{}; (2) a Cluster Energy Saving service, which improves overall cluster utilization by up to 13\%.},
booktitle = {Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis},
articleno = {104},
numpages = {15},
keywords = {GPU datacenter, cluster management system, cluster statistical analysis, deep learning training, energy conservation, time-series prediction, workload scheduling},
location = {St. Louis, Missouri},
series = {SC '21}
}

@misc{xiaohongshu2024volcano,
  title={小红书搜索推荐广告场景下的大数据架构},
  author={Xiaohongshu},
  year={2024},
  url={https://volcano.sh/zh/blog/xiaohongshu},
  note={Big data architecture for search, recommendation and advertising scenarios}
}

@misc{nvidia2024recsys,
  title={Recommender Systems Best Practices},
  author={NVIDIA},
  year={2024},
  url={https://docs.nvidia.com/deeplearning/performance/recsys-best-practices/index.html},
  note={NVIDIA's recommendation system best practices and implementation guide}
}

@misc{nvidia2024merlin,
  title={Recommender Systems: Not Just Recommender Models},
  author={NVIDIA},
  year={2024},
  url={https://medium.com/nvidia-merlin/recommender-systems-not-just-recommender-models-485c161c755e},
  note={NVIDIA Merlin framework for recommendation systems}
}

@misc{alibaba2024clusterdata,
  title={Alibaba Cluster Data Repository},
  author={Alibaba Group},
  year={2024},
  url={https://github.com/alibaba/clusterdata},
  note={Comprehensive workload traces from Alibaba clusters covering 2017-2025}
}

@techreport{academia2024workload,
  title={Workload Characterization in LLM Application Scenarios},
  author={Research Community},
  institution={Academic Research},
  year={2024},
  note={Comprehensive analysis of workload patterns in LLM-based recommendation systems, Agentic AI, and distributed database systems}
}
